/*******************************************************************************
																				
	DESCRIPTION:  	This do file generates panels A and B of Table 3 and Appendix
					Table A3 in full.

	
*******************************************************************************/

clear all
global id_code 111_1
set seed 2110

* Set year:
local year 2006

/*******************************************************************************
*	Predicted job-finding rates - time series - predictions done by year
********************************************************************************/

* Create frame for results:
frame create sum str1000(vars) R_sq beta cov


foreach model in  ///
			/// Sequential sub-models:
            Full_SeqDrop_incIndiv Full_SeqDrop_incOther Full_SeqDrop_emplHist /// 
			Full_SeqDrop_incHist Full_SeqDrop_migHist Full_SeqDrop_indu ///
			Full_SeqDrop_mun Full ///
			/// Marginal sub_models:
            Full_Marg_incIndiv Full_Marg_incOther Full_Marg_emplHist  ///
            Full_Marg_incHist Full_Marg_migHist Full_Marg_indu ///
            Full_Marg_mun {
	

	* Load the data
	use "${data}/003_MainWithEnsemblePred_`model'_`year'.dta", clear

	* Calculate the beta and the R-squared from regressing the empirical JFR on the predicted JFR
	reg emplAft6M_0M_In p_emplAft6M_0M_In
	
	local R_sq = e(r2)
	local beta = _b[p_emplAft6M_0M_In]
	
	* Calculate the covariance between the empirical JFR and the predicted JFR
	correlate emplAft6M_0M_In p_emplAft6M_0M_In, covariance
	local cov = r(cov_12)		
	
	 * Save to frame
	frame post sum ("`model'") (`R_sq')	(`beta') (`cov')		
			
}

/*******************************************************************************
*	Main text table: sequential vs marginal contributions
********************************************************************************/

* Generate table with variable groups:
frame copy sum main_table
frame change main_table
keep vars R_sq

* Fix variable names:
replace vars = "Basic" if vars == "Full_SeqDrop_incIndiv"
forval i = 2/8 {
	replace vars = vars[`=`i'+7'] in `i'
}

replace vars = subinstr(vars, "Full_Marg_", "", .)

* Gen marginal / sequential indicator:
gen j = 1 in 1/8
replace j = 2 in 9/15
label define j 1 "Sequential" 2 "Marginal"
label values j j

gen ord = _n in 1/8
replace ord = _n-7 in 9/15

* Transform to wide format:
reshape wide R_sq, i(vars) j(j)
sort ord
drop ord

* Add percentage increases:
gen incr1_temp = .

forval i = 2/8 {
	replace incr1_temp = 100 * (R_sq1 / R_sq1[`=`i'-1'] - 1) in `i'
}

gen incr2_temp = 100 * (R_sq2 / R_sq1[1] - 1)

* Format them properly:
gen incr1 = "+" + string(incr1_temp, "%9.1f") + "\%" if incr1_temp>0
replace incr1 = string(incr1_temp, "%9.1f") + "\%" if incr1_temp<=0
gen incr2 = "+" + string(incr2_temp, "%9.1f") + "\%" if incr2_temp>0
replace incr2 = string(incr2_temp, "%9.1f") + "\%" if incr2_temp<=0

drop incr1_temp incr2_temp

* Re-order variables:
order vars R_sq1 incr1 R_sq2 incr2

* New format:
file open myfile using "${output}/${id_code}_Explanatory_power_of_variables_Submodels_table.tex", write replace

file write myfile "%\documentclass{article}" _newline ///
	"%\usepackage{booktabs}" _newline ///
	"%\usepackage[margin=1in]{geometry}" _newline ///
	"%\begin{document}" _newline ///
	"%\begin{table}[h] \centering" _newline ///
	"\footnotesize \begin{tabular}{l c c c c c c c c}" _newline ///
	"\hline \hline \addlinespace[3ex]" _newline ///
	///
	"& \multicolumn{8}{c}{\large \textbf{A. Sub-models of Baseline - Sequential}} \\ \addlinespace[3ex]" _newline ///
	" & \normalsize (1) & \normalsize (2) & \normalsize (3) & \normalsize (4) & \normalsize (5) & \normalsize (6) & \normalsize (7) & \normalsize (8) \\ \addlinespace[1.5ex]" _newline ///
	" \cline{2-9} \addlinespace[1.5ex]" _newline ///
	"\normalsize \(R^2(\hat{F}_{0}, F_{0})\) & \normalsize `:di %9.3f `= R_sq1[1]'' & \normalsize `:di %9.3f `=R_sq1[2]'' & \normalsize `:di %9.3f `=R_sq1[3]'' & \normalsize `:di %9.3f `=R_sq1[4]'' & \normalsize `:di %9.3f `=R_sq1[5]'' & \normalsize `:di %9.3f `=R_sq1[6]'' & \normalsize `:di %9.3f `=R_sq1[7]'' & \normalsize `:di %9.3f `=R_sq1[8]''  \\ \addlinespace[0.2cm]" _newline ///
	"Change (\(j\)) vs (\(j-1\)) & - & `= incr1[2]' & `= incr1[3]' & `= incr1[4]' & `= incr1[5]' & `= incr1[6]' & `= incr1[7]' & `= incr1[8]' \\ \addlinespace[3ex]" _newline ///
	" \cline{2-9} \addlinespace[1.5ex]" _newline ///
	"\small Socio-demographics & X & X & X & X & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Labour Income & & X & X & X & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Other Income & & & X & X & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Employment History & & & & X & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Income History & & & & & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Migration History & & & & & & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Industry & & & & & & & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Municipality & & & & & & & & X \\ \addlinespace[0cm]" _newline ///
	" \addlinespace[6ex]" _newline ///
	///
	"& \multicolumn{8}{c}{\large \textbf{B. Sub-models of Baseline - Marginal}} \\ \addlinespace[3ex]" _newline ///
	" & \normalsize (1) & \normalsize (2) & \normalsize (3) & \normalsize (4) & \normalsize (5) & \normalsize (6) & \normalsize (7) & \normalsize (8) \\ \addlinespace[1.5ex] " _newline ///
	"\cline{2-9}  \addlinespace[1.5ex]" _newline ///
	"\normalsize \(R^2(\hat{F}_{0}, F_{0})\) & \normalsize `:di %9.3f `= R_sq1[1]'' & \normalsize `:di %9.3f `=R_sq2[2]'' & \normalsize `:di %9.3f `=R_sq2[3]'' & \normalsize `:di %9.3f `=R_sq2[4]'' & \normalsize `:di %9.3f `=R_sq2[5]'' & \normalsize `:di %9.3f `=R_sq2[6]'' & \normalsize `:di %9.3f `=R_sq2[7]'' & \normalsize `:di %9.3f `=R_sq2[8]'' \\ \addlinespace[0.2cm]" _newline ///
	"Change (\(j\)) vs (1) & - & `= incr2[2]' & `= incr2[3]' & `= incr2[4]' & `= incr2[5]' & `= incr2[6]' & `= incr2[7]' & `= incr2[8]' \\ \addlinespace[3ex]" _newline ///
	" \cline{2-9} \addlinespace[1.5ex]" _newline ///
	"\small Socio-demographics & X & X & X & X & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Labour Income & & X & & & & & &  \\ \addlinespace[0cm]" _newline ///
	"\small Other Income & & & X & & & & & \\ \addlinespace[0cm]" _newline ///
	"\small Employment History & & & & X & & & & \\ \addlinespace[0cm]" _newline ///
	"\small Income History & & & & & X & & & \\ \addlinespace[0cm]" _newline ///
	"\small Migration History & & & & & & X & & \\ \addlinespace[0cm]" _newline ///
	"\small Industry & & & & & & & X & \\ \addlinespace[0cm]" _newline ///
	"\small Municipality & & & & & & & & X \\ \addlinespace[0cm]" _newline ///
	"\addlinespace[6ex]" _newline ///
	"%\hline \hline \addlinespace[1.5ex]" _newline ///
	"%\end{tabular}" _newline ///
	"%\end{table}" _newline ///
	"%\end{document}"
file close myfile	
	
	
	
/*******************************************************************************
*	Appendix table: ML vs Linear model
********************************************************************************/
frame change default

local model Full 
local year 2006

foreach vars in  _SeqDrop_incIndiv_  _SeqDrop_incOther_ _SeqDrop_emplHist_ _SeqDrop_incHist_ ///
	_SeqDrop_migHist_ _SeqDrop_indu_ _SeqDrop_mun_   _  {
		
	use "${data}/116_Linear_`model'`vars'`year'.dta", clear
	
	frame post sum ("Linear`vars'") (R_`model'_2006[1])	///
		(.) (.)		
			
}

* Generate table with variable groups:
frame copy sum linear_table
frame change linear_table
keep vars R_sq
keep in 16/23
rename R_sq R_sq1

* Add percentage increases:
gen incr1_temp = .

forval i = 2/8 {
	replace incr1_temp = 100 * (R_sq1 / R_sq1[`=`i'-1'] - 1) in `i'
}

* Format them properly:
gen incr1 = "+" + string(incr1_temp, "%9.1f") + "\%" if incr1_temp>0
replace incr1 = string(incr1_temp, "%9.1f") + "\%" if incr1_temp<=0

drop incr1_temp 

* Define labels:
label variable R_sq1 "\(R^2(\hat{F}_{d}, F_{d})\)"
label variable incr1 "Change (\%)"

* Formats:
format R_sq* %8.4f

* Rename rows:
replace vars = "Basic Socio-demographics" in 1
replace vars = "Individual Income" in 2
replace vars = "Other Income" in 3
replace vars = "Employment History" in 4
replace vars = "Income History" in 5
replace vars = "Migration History" in 6
replace vars = "Industry" in 7
replace vars = "Municipality" in 8

* New format:
file open myfile using "${output}/${id_code}_Explanatory_power_of_variables_MLvsLinear_table.tex", write replace

frame change main_table	


file write myfile "%\documentclass{article}" _newline ///
	"%\usepackage{booktabs}" _newline ///
	"%\usepackage[margin=1in]{geometry}" _newline ///
	"%\begin{document}" _newline ///
	"%\begin{table}[h] \centering" _newline ///
	"\footnotesize \begin{tabular}{l c c c c c c c c}" _newline ///
	"\hline \hline \addlinespace[3ex]" _newline ///
	///
	"& \multicolumn{8}{c}{\large \textbf{A. ML Model}} \\ \addlinespace[3ex]" _newline ///
	" & \normalsize (1) & \normalsize (2) & \normalsize (3) & \normalsize (4) & \normalsize (5) & \normalsize (6) & \normalsize (7) & \normalsize (8) \\ \addlinespace[1.5ex]" _newline ///
	" \cline{2-9} \addlinespace[1.5ex]" _newline ///
	"\normalsize \(R^2(\hat{F}_{0}, F_{0})\) & \normalsize `:di %9.3f `= R_sq1[1]'' & \normalsize `:di %9.3f `=R_sq1[2]'' & \normalsize `:di %9.3f `=R_sq1[3]'' & \normalsize `:di %9.3f `=R_sq1[4]'' & \normalsize `:di %9.3f `=R_sq1[5]'' & \normalsize `:di %9.3f `=R_sq1[6]'' & \normalsize `:di %9.3f `=R_sq1[7]'' & \normalsize `:di %9.3f `=R_sq1[8]''  \\ \addlinespace[0.2cm]" _newline ///
	"Change (\(j\)) vs (\(j-1\)) & - & `= incr1[2]' & `= incr1[3]' & `= incr1[4]' & `= incr1[5]' & `= incr1[6]' & `= incr1[7]' & `= incr1[8]' \\ \addlinespace[0.2cm]" _newline ///
	"\addlinespace[6ex]" _newline ///


frame change linear_table	

file write myfile 	"& \multicolumn{8}{c}{\large \textbf{B. Linear model}} \\ \addlinespace[3ex]" _newline ///
	" & \normalsize (1) & \normalsize (2) & \normalsize (3) & \normalsize (4) & \normalsize (5) & \normalsize (6) & \normalsize (7) & \normalsize (8) \\ \addlinespace[1.5ex]" _newline ///
	" \cline{2-9} \addlinespace[1.5ex]" _newline ///
	"\normalsize \(R^2(\hat{F}_{0}, F_{0})\) & \normalsize `:di %9.3f `= R_sq1[1]'' & \normalsize `:di %9.3f `=R_sq1[2]'' & \normalsize `:di %9.3f `=R_sq1[3]'' & \normalsize `:di %9.3f `=R_sq1[4]'' & \normalsize `:di %9.3f `=R_sq1[5]'' & \normalsize `:di %9.3f `=R_sq1[6]'' & \normalsize `:di %9.3f `=R_sq1[7]'' & \normalsize `:di %9.3f `=R_sq1[8]''  \\ \addlinespace[0.2cm]" _newline ///
	"Change (\(j\)) vs (\(j-1\)) & - & `= incr1[2]' & `= incr1[3]' & `= incr1[4]' & `= incr1[5]' & `= incr1[6]' & `= incr1[7]' & `= incr1[8]' \\ \addlinespace[3ex]" _newline ///
	"\cline{2-9} \addlinespace[1.5ex]" _newline ///
	"\small Socio-demographics & X & X & X & X & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Labour Income & & X & X & X & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Other Income & & & X & X & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Employment History & & & & X & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Income History & & & & & X & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Migration History & & & & & & X & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Industry & & & & & & & X & X \\ \addlinespace[0cm]" _newline ///
	"\small Municipality & & & & & & & & X \\ \addlinespace[0cm]" _newline ///
	"\addlinespace[3ex]" _newline ///
	///
	"\hline \hline \addlinespace[1.5ex]" _newline ///
	"\end{tabular}" _newline ///
	"%\end{table}" _newline ///
	"%\end{document}"
file close myfile	
		